abline() for vertical lines, legend() for text labels)curve(), legend())segments(), area under the curve with polygon())curve())curve())polygon(), annotations with arrows())TODO
barplot formula notation
expression
expression w/ text (paste plus space, tilde for space)
p <- 0.2
x <- 0:10
n <- 10
y <- dbinom(x, n, p)
barplot(y ~ x,
xlab = "x", ylab = expression(P(X == x)),
main = expression(paste("Binomial distribution ", X %~% ~ B(10, 0.2))))
substitute
p <- 0.2
x <- 0:10
n <- 10
y <- dbinom(x, n, p)
barplot(y ~ x,
xlab = "x", ylab = expression(P(X == x)),
main = substitute(paste("Binomial distribution ", X %~% ~ B(n, p)), list(n=n, p=p)))
abline() for vertical lines, legend() for text labels)x <- seq(0, 3, by = 0.001)
unit <- 100000
m <- 0.6
s <- 0.08
y <- dlnorm(x, log(m))
y <- y/sum(y)
mu <- sum(x * y * unit)
med <- max(x[cumsum(y) <= 0.5]) * unit
addlabel <- function(x, y, label, col, xjust = 0.05) {
legend(x, y, label, text.col = col, box.col = "white", xjust = xjust, x.intersp = 0)
}
plot(x * unit, y, type = "l",
xlab = "Income in €", ylab = "Density",
main = "A fictitious but typical income distribution")
abline(v = c(med, mu), lty = "dashed", col = c("red", "blue"))
addlabel(med, 0.001, paste("Median:", med, "€"), "red")
addlabel(mu, 0.0008, paste("Mean:", round(mu), "€"), "blue")
curve(), legend())multiple graphics
better use “stick plot” (?)
p <- 0.5
xlim <- list(
"10" = c(0, 10),
"100" = c(35, 65),
"1000" = c(450, 550)
)
lwd <- list(
"10" = 5,
"100" = 3,
"1000" = 1
)
lastn <- 0
for (n in c(10, 100, 1000, 1000)) {
x <- 0:n
y <- dbinom(x, n, p)
k <- as.character(n)
plot(x, y, type = "h", lwd = lwd[[k]],
xlab = 'x – Number of "heads"', ylab = expression(P(X == x)),
main = sprintf("Binomial distribution for %d coin flips", n),
xlim = xlim[[k]])
if (lastn == 1000) {
mu <- n*p
sigma <- n*p/sqrt(n)
f <- function(x) { dnorm(x, mean = mu, sd = sigma) }
curve(f, xlim[[k]][1], xlim[[k]][2], add = TRUE, col = "red")
legend(510, 0.025, substitute(paste("Normal distribution ", N(mu, sigma)),
list(mu=mu, sigma=round(sigma, 2))),
lty = "solid", col = "red", cex = 0.75)
}
lastn <- n
}
segments(), area under the curve with polygon())a <- 0
b <- 10
y <- 1/(b-a)
plot(c(a, b), c(y, y), xlim = c(a-1, b+1), ylim = c(0, y*1.1), pch = 16,
xlab = "Waiting time in minutes", ylab = "Density",
main = "Density function of the waiting time")
segments(a-1, 0, a, 0)
segments(a, 0, a, y, lty = "dashed")
segments(a, y, b, y)
segments(b, y, b, 0, lty = "dashed")
segments(b, 0, b+1, 0)
a <- 0
b <- 10
y <- 1/(b-a)
x1 <- 7.5
x2 <- 10
plot(c(a, b), c(y, y), xlim = c(a-1, b+1), ylim = c(0, y*1.1), pch = 16,
xlab = "Waiting time in minutes", ylab = "Density",
main = "Density function of the waiting time")
polygon(c(x1, x1, x2, x2, x1), c(0, y, y, 0, 0), col = "lightgray", border = FALSE)
text(x1 + (x2 - x1) / 2, y/2, substitute(P(a <= ~X <= b), list(a=x1, b=x2)), cex = 0.75)
segments(a-1, 0, a, 0)
segments(a, 0, a, y, lty = "dashed")
segments(a, y, b, y)
segments(b, y, b, 0, lty = "dashed")
segments(b, 0, b+1, 0)
param <- list(
c(0, 1),
c(0, 0.5),
c(0, 2),
c(2, 1),
c(-1, 0.4)
)
styles <- list(
c("solid", "darkred"),
c("solid", "coral"),
c("solid", "red"),
c("dashed", "darkred"),
c("dotted", "orange")
)
for (maxshow in 1:length(param)) {
plot(NULL, xlim = c(-5, 5), ylim = c(0, 1.0),
main = "Normal distributions",
xlab = "x",
ylab = "f(x)")
legend_labels <- character()
legend_lty <- character()
legend_col <- character()
for (i in 1:maxshow) {
p <- param[[i]]
m <- p[1]
s <- p[2]
k <- sprintf("m%.1f_s%.1f", m, s)
linest <- styles[[i]]
f <- function(x) { dnorm(x, mean = m, sd = s) }
curve(f, lwd = 3, lty = linest[1], col = linest[2], n = 1001, add = TRUE)
legend_labels <- append(legend_labels, sprintf("N(%.1f, %.1f)", m , s))
legend_lty <- append(legend_lty, linest[1])
legend_col <- append(legend_col, linest[2])
}
legend("topright", legend_labels, lty = legend_lty, col = legend_col)
}
curve())curve(dnorm, lwd = 2, n = 1001, from = -3, to = 3,
main = "Standard normal distribution N(0, 1)",
xlab = "Standard unit z",
ylab = "f(z)")
curve())f <- function(x) { dnorm(x, mean = 10, sd = 8) }
curve(f, lwd = 2, n = 1001, from = -15, to = 35,
main = "Temperature normally distrubted as N(10°C, 8°C)",
xlab = "Temperature x in °C",
ylab = "f(x)")
polygon(), annotations with arrows())x <- seq(-3.5, 3.5, length = 1001)
y <- dnorm(x)
plot(x, y, ylim = c(0, 0.6), type = "l",
xlab = "z", ylab = "f(z)", main = "Standard normal distribution N(0, 1)")
for (z in 1:3) {
iv <- x >= -z & x <= z
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
abline(v = c(-z, z), lty = "dashed", col = paste0("#000000", c("FF", "AA", "66")[z]))
y_segm <- 0.38 + 0.055 * z
arrows(-z, y_segm, z, y_segm, code = 3, length = 0.1)
text(0, y_segm, paste0(round((1-pnorm(-z) * 2) * 100, 1), "%"), adj = c(0.5, -0.2))
}
x <- seq(-3.5, 3.5, length = 1001)
y <- dnorm(x)
plot(x, y, ylim = c(0, 0.6), type = "l",
xlab = "x", ylab = "f(x)",
main = expression(paste("Normal distribution ", N(mu, sigma))),
xaxt = "n")
for (z in 1:3) {
iv <- x >= -z & x <= z
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
abline(v = c(-z, z), lty = "dashed", col = paste0("#000000", c("FF", "AA", "66")[z]))
y_segm <- 0.38 + 0.055 * z
arrows(-z, y_segm, z, y_segm, code = 3, length = 0.1)
text(0, y_segm, paste0(round((1-pnorm(-z) * 2) * 100, 1), "%"), adj = c(0.5, -0.2))
}
xticks <- -3:3
axis(1, at = -3:3, labels = expression(mu - 3 * sigma,
mu - 2 * sigma,
mu - sigma,
mu,
mu + sigma,
mu + 2 * sigma,
mu + 3 * sigma))
x <- seq(-4, 4, length = 1000)
y <- dnorm(x)
plot(x, y, type = "l",
main = "Density function f(x) of the standard normal distribution:\ndnorm(x)",
xlab = "x", ylab = "f(x)")
x <- seq(-4, 4, length = 1000)
y <- pnorm(x)
plot(x, y, type = "l",
main = "Distribution function F(x) of the standard normal distribution:\npnorm(x)",
xlab = "x", ylab = "F(x)")
x <- seq(-4, 4, length = 1000)
y <- pnorm(x)
plot(x, y, type = "l",
main = "Distribution function F(x) of the standard normal distribution:\npnorm(x)",
xlab = "x", ylab = "F(x)")
p <- 0.9
q <- qnorm(0.9)
segments(-4, p, q, p, lty = "dashed", col = "red")
segments(q, p, q, 0, lty = "dashed", col = "red")
text(-3.5, p, paste("F(x) =", p), pos = 3, col = "red")
text(q, 0, paste("For which x?"), pos = 4, col = "red")
x <- seq(0, 1, length = 1000)
y <- qnorm(x)
plot(x, y, type = "l",
main = expression(paste("Quantile function ", F^{-1} * (x), " of the standard normal distribution: qnorm(x)")),
xlab = "x", ylab = expression(F^{-1} * (x)))
set.seed(20231206)
x <- rnorm(1000)
n <- length(x)
d <- density(x)
# for each generated x_i, find the corresponding value of the density curve; this allows to generate a random
# y-coordinate that fits under the density curve
max_y <- sapply(x, function(x_i) {
d$y[min(which(x_i < d$x))]
})
plot(d, main = paste(n, "randomly sampled values from the standard normal distribution"),
xlab = "x", ylab = "Density")
points(x, y = runif(n, 0, max_y), col = rgb(0, 0, 0, 0.5))
dotchart(), math. expressions, legends)set.seed(20231024)
d <- read.table('data/miete03.asc', header = TRUE)
rent <- d$nm
mu <- mean(rent)
n <- length(rent)
hist(rent, main = "Histogram of the rent", freq = FALSE,
sub = paste("n =", n), xlab = "Rent in €", ylab = "Probability")
abline(v = mu, lty = 'dashed', col = 'red')
text(mu, 0.0017, expression(mu), adj = c(-0.5, 0), col = 'red')
n <- 30
m <- 10
hat_mu_i <- sapply(1:m, function(i) {
mean(sample(rent, n))
})
dotchart(hat_mu_i, labels = as.character(1:m), xlab = "Rent in €", ylab = "Sample",
main = substitute(
paste("Sample means ", hat(mu)[i], " for ", m, " samples of size n=", n),
list(m=m, n=n)
))
abline(v = mu, lty = 'dashed', col = 'red')
text(mu, 0.5, expression(mu), adj = c(-0.5, 0), col = 'red')
mean_hat_mu <- mean(hat_mu_i)
abline(v = mean_hat_mu, lty = 'dashed', col = 'blue')
text(mean_hat_mu, 0.5, expression(bar(hat(mu))), adj = c(-0.5, 0), col = 'blue')
n <- 30
m <- 10000
hat_mu_i <- sapply(1:m, function(i) {
mean(sample(rent, n))
})
hist(hat_mu_i, freq = FALSE, xlab = "Rent in €", ylab = "Probability",
main = substitute(
paste("Histogram of sample means for ", m, " samples of size n=", n),
list(m=m, n=n)
)
)
abline(v = mu, lty = "solid", col = 'red')
text(mu, 0, expression(mu), adj = c(-0.5, 0), col = 'red')
mean_hat_mu <- mean(hat_mu_i)
abline(v = mean_hat_mu, lty = 'dashed', col = 'blue')
text(mean_hat_mu, 0.0005, expression(bar(hat(mu))), adj = c(-0.5, 0), col = 'blue')
e <- round(abs(mean_hat_mu - mu), 2)
text(mean_hat_mu, 0.0085, expression(abs(paste(" ", bar(hat(mu)) - mu, " "))), adj = c(-1, 0))
text(mean_hat_mu, 0.0085, paste0("= ", e, "€"), adj = c(-2.2, -0.2))
m <- 10000
se_per_samplesize <- numeric()
samplesizes <- integer()
for (n in c(15, 30, 100, 1000)) {
hat_mu_i <- sapply(1:m, function(i) {
mean(sample(rent, n))
})
hist(hat_mu_i, freq = FALSE, xlab = "Rent in €", ylab = "Probability",
main = substitute(
paste("Histogram of sample means for ", m, " samples of size n=", n),
list(m=m, n=n)
),
breaks = seq(200, 940, by = 5)
)
se_per_samplesize <- c(se_per_samplesize, sd(hat_mu_i))
samplesizes <- c(samplesizes, n)
}
plot(samplesizes, se_per_samplesize, type = "p",
main = "Estimation error of the rent by sample size",
xlab = "Sample size",
ylab = "Standard error in €")
nrange <- min(samplesizes):max(samplesizes)
se_theoretic <- sd(rent) / sqrt(nrange)
lines(nrange, se_theoretic, lty = 'dashed', col = 'red')
legend("topright", c("Empirical standard error", "Theoretic standard error"),
pch = c(1, NA_integer_), lty = c(NA_integer_, 2), col = c("black", "red"))
x <- seq(-3.5, 3.5, length = 1001)
y <- dnorm(x)
plot(x, y, ylim = c(0, 0.6), type = "l",
xlab = "x", ylab = "f(x)",
main = expression(paste("Distribution of the sample mean ", bar(X) %~% ~ N(mu, sigma[bar(X)]))),
xaxt = "n")
for (z in 1:3) {
iv <- x >= -z & x <= z
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
abline(v = c(-z, z), lty = "dashed", col = paste0("#000000", c("FF", "AA", "66")[z]))
y_segm <- 0.38 + 0.055 * z
arrows(-z, y_segm, z, y_segm, code = 3, length = 0.1)
text(0, y_segm, paste0(round((1-pnorm(-z) * 2) * 100, 1), "%"), adj = c(0.5, -0.2))
}
xticks <- -3:3
axis(1, at = -3:3, labels = expression(mu - 3 * sigma[bar(X)],
mu - 2 * sigma[bar(X)],
mu - sigma[bar(X)],
mu,
mu + sigma[bar(X)],
mu + 2 * sigma[bar(X)],
mu + 3 * sigma[bar(X)]))
barplot(rep(1, 6) / 6, ylim = c(0, 1), names.arg = 1:6,
main = "Probability distribution for a fair die",
xlab = "Spots", ylab = "Probability")
set.seed(20231106)
m <- 10000
for (n in c(10, 30, 100)) {
means <- sapply(1:m, function(i) {
X <- sample(1:6, 10, replace = TRUE)
mean(X)
})
hist(means, probability = TRUE, breaks = seq(1, 6, by = 0.1),
main = paste("Distribution of the sample mean for the number of spots with n =", n),
xlab = "Sample mean for the number of spots",
ylab = "Density")
}
unit <- 100000
mean <- 0.6
x <- seq(0, 3, by = 0.01)
d <- dlnorm(x, log(mean)) * unit
d <- d[d < 100000]
hist(d, breaks = seq(0, 1.2, by = 0.1) * unit, probability = TRUE,
main = "Fictitious income distribution",
xlab = "Income in €",
ylab = "Density")
m <- 10000
for (n in c(10, 100, 1000)) {
means <- sapply(1:m, function(i) {
X <- rlnorm(n, log(mean)) * unit
mean(X)
})
means <- means[means < 250000]
hist(means, probability = TRUE, breaks = seq(0, 2.5, by = 0.02) * unit,
main = paste("Distribution of the sample mean of incomes with n =", n),
xlab = "Sample mean of incomes",
ylab = "Density")
}
set.seed(20231111)
d <- read.table('data/miete03.asc', header = TRUE)
rent <- d$nm
mu <- mean(rent)
sigma <- sqrt(mean((mu-rent)^2))
m <- 100 # number of draws
n <- 30 # sample size
sample_conf_interval <- function(i, data, n, sigma, z) {
X <- sample(data, size = n)
Xbar <- mean(X)
E <- z * sigma/sqrt(n)
c(Xbar - E, Xbar + E)
}
for (gamma in c(0.8, 0.95, 0.99)) { # confidence levels
alpha <- 1 - gamma # significance level
z <- qnorm(1-alpha/2)
conf_ints <- t(sapply(1:m, sample_conf_interval, rent, n, sigma, z))
plot(NULL, xlim = c(300, 850), ylim = c(1, m+1),
main = sprintf("%d%%-confidence intervals for %d random samples (n=%d)\nof the rent dataset",
round(gamma*100), m, n),
xlab = "Rent in €",
ylab = "Sample draw")
abline(v = mu, col = "red", lty = "dashed")
text(mu, 100, expression(mu), col = "red", adj = c(-0.5, -0.5))
mu_covered <- conf_ints[,1] < mu & mu < conf_ints[,2]
conf_int_col <- ifelse(mu_covered, "black", "red")
segments(conf_ints[,1], 1:m, conf_ints[,2], 1:m, col = conf_int_col)
legend("topright", c(expression(paste("Confidence interval covers ", mu)),
expression(paste("Confidence interval doesn't cover ", mu))),
lty = "solid",
col = c("black", "red"))
}
curve(dnorm, lwd = 2, n = 1001, from = -5, to = 5,
lty = "dashed",
main = expression(paste(italic(t), " distributions and the standard normal distribution")),
xlab = "x",
ylab = "f(x)")
nvals <- c(2, 5, 15)
legend_labels <- c("N(0, 1)")
legend_col <- c(gray(0))
legend_lty <- c("dashed")
for (i in 1:length(nvals)) {
n <- nvals[i]
col <- gray(1-0.2*i)
f <- function(x) { dt(x, n) }
curve(f, lwd = 2, n = 1001, from = -5, to = 5, add = TRUE, col = col)
legend_labels <- c(legend_labels, sprintf("t(%d)", n))
legend_col <- c(legend_col, col)
legend_lty <- c(legend_lty, "solid")
}
legend("topright", legend_labels, col = legend_col, lty = legend_lty)
mu <- 17
X <- c(16.4, 19.4, 17.8, 16.4, 18.5, 19.0, 19.5, 17.1, 16.1)
Xbar <- mean(X)
n <- length(X)
sigma <- 1.5 / sqrt(n)
z <- qnorm(c(0.025, 0.975), mean = Xbar, sd = sigma)
confint <- paste0(round(z, 2), "cm")
plot(X, runif(n), ylim = c(-0.5, 1.5), yaxt = "n",
ylab = "", xlab = "Pencil length in cm",
main = "Random sample of pencil lengths and 95% confidence interval")
abline(h = 0.5)
abline(v = mu, lty = "dashed")
text(mu, -0.45, expression(mu), adj = -0.5)
segments(z[1], -0.1, z[2], -0.1, col = "red")
segments(z[1], -0.05, z[1], -0.15, col = "red")
segments(z[2], -0.05, z[2], -0.15, col = "red")
segments(Xbar, -0.05, Xbar, -0.15, col = "red")
text(c(z, Xbar), 0.075, c(confint, paste0(Xbar, "cm")), col = "red")
text(Xbar, -0.45, expression(bar(X)), col = "red")
X <- c(16.4, 19.4, 17.8, 16.4, 18.5, 19.0, 19.5, 17.1, 16.1)
Xbar <- mean(X)
mu <- 17
n <- length(X)
sigma <- 1.5 / sqrt(n)
z <- qnorm(c(0.025, 0.975), mean = mu, sd = sigma)
p <- 1-pnorm(Xbar, mu, sigma)
x <- seq(15, 19, length = 1001)
y <- dnorm(x, mu, sigma)
plot(x, y, type = "l", xlab = "Pencil length in cm", ylab = "f(x)",
main = expression(paste("Distribution of ", bar(X), " around ", mu, "=17cm under ", H[0],
" (", sigma, "=1.5cm and n=9)")))
iv <- x < z[1]
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
iv <- x > z[2]
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
abline(v = z, lty = "dashed", col = "#00000066")
abline(v = mu, lty = "dashed", col = "#000000DD")
text(mu, 0, expression(mu), adj = -0.25)
iv <- x >= Xbar
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#ff000050", border = FALSE)
abline(v = Xbar, lty = "dashed", col = "red")
text(Xbar, 0.15, substitute(P(bar(X) >= Xbar) == p,
list(Xbar=Xbar, p=sprintf("%.1f%%", p*100))),
adj = -0.02, col = "red")
text(c(mu, z[2]), 0.035, c("95%", "2.5%"), adj = -0.25)
text(z[1], 0.035, "2.5%", adj = 1.25)
X <- c(16.4, 19.4, 17.8, 16.4, 18.5, 19.0, 19.5, 17.1, 16.1)
Xbar <- mean(X)
mu <- 17
n <- length(X)
sigma <- 1.5 / sqrt(n)
x <- seq(15, 19, length = 1001)
y <- dnorm(x, mu, sigma)
z <- qnorm(c(0.025, 0.975), mean = Xbar, sd = sigma)
konfint <- paste0(round(z, 2), "cm")
plot(x, y, type = "l", xlab = "Pencil length in cm", ylab = "f(x)",
main = expression(paste("Distribution of ", bar(X), " around ", mu, "=17cm under ", H[0],
" (", sigma, "=1.5cm and n=9)")))
abline(v = mu, lty = "dashed")
text(mu, 0.1, expression(mu), adj = -0.5)
segments(z[1], -0.015, z[2], -0.015, col = "red")
segments(z[1], -0.01, z[1], -0.02, col = "red")
segments(z[2], -0.01, z[2], -0.02, col = "red")
segments(Xbar, -0.01, Xbar, -0.02, col = "red")
text(c(z, Xbar), 0.025, c(konfint, paste0(Xbar, "cm")), col = "red")
text(Xbar, 0.07, expression(bar(X)), col = "red")
x <- seq(-4, 4, length = 1001)
y <- dnorm(x)
plot(x, y, type = "l", xlab = "", ylab = "Density",
main = expression(paste("One-sided test: Alternative hypothesis is ", mu < mu[0])),
xaxt = "n")
p <- 0.3
z <- qnorm(p)
abline(v = 0, lty = "dashed")
segments(z, 0, z, dnorm(z), lty = "dashed")
axis(1, c(0, z), labels = c(expression(mu[0]), expression(bar(x))))
iv <- x < z
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
text(-1.5, 0.01, expression(P(bar(X) <= bar(x))))
plot(x, y, type = "l", xlab = "", ylab = "Density",
main = expression(paste("One-sided test: Alternative hypothesis is ", mu > mu[0])),
xaxt = "n")
abline(v = 0, lty = "dashed")
segments(z, 0, z, dnorm(z), lty = "dashed")
axis(1, c(0, z), labels = c(expression(mu[0]), expression(bar(x))))
iv <- x > z
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
text(1.3, 0.01, expression(P(bar(X) >= bar(x))))
x <- seq(-4, 4, length = 1001)
y <- dnorm(x)
plot(x, y, type = "l", xlab = "", ylab = "Density",
main = expression(paste("Two-sided test: Sample mean is ", bar(x) < mu[0])),
xaxt = "n")
p <- 0.3
z <- qnorm(p)
abline(v = 0, lty = "dashed")
segments(z, 0, z, dnorm(z), lty = "dashed")
axis(1, c(0, z), labels = c(expression(mu[0]), expression(bar(x))))
iv <- x < z
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
text(-1.5, 0.01, expression(P(bar(X) <= bar(x))))
x <- seq(-4, 4, length = 1001)
y <- dnorm(x)
plot(x, y, type = "l", xlab = "", ylab = "Density",
main = expression(paste("Two-sided test: Sample mean is ", bar(x) > mu[0])),
xaxt = "n")
abline(v = 0, lty = "dashed")
z <- qnorm(1-p)
segments(z, 0, z, dnorm(z), lty = "dashed")
axis(1, c(0, z), labels = c(expression(mu[0]), expression(bar(x))))
iv <- x > z
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#aaaaaa50", border = FALSE)
text(1.3, 0.01, expression(P(bar(X) >= bar(x))))
n <- 100
p <- 0.1
k <- 12
p_less_k <- round(pbinom(11, 100, 0.1), 3)
p_ge_k <- 1 - p_less_k
subst_params <- list(p_less_k=p_less_k, p_ge_k=p_ge_k)
x <- 0:100
y <- dbinom(x, n, p)
barcols <- ifelse(x >= k, "orange", "gray")
barplot(y ~ x, axis.lty = 1, col = barcols,
xlab = "Number x of waste items",
ylab = "Probability P(X=x)",
main = "Probability of number of waste items under the null hypothesis")
legend("topright",
c(expression(paste("Number of waste items " < 12)),
expression(paste("Number of waste items " >= 12))),
fill = c("gray", "orange"))
text(34, 0.04, substitute(P(X >= 12) == p, list(p = p_ge_k)), adj = 0)
arrows(33, 0.039, 20, 0.03, length = 0.1)
x <- 0:100
y <- pbinom(x, n, p)
barcols <- ifelse(x == k - 1, "red", "gray")
barplot(y, names.arg = as.character(x), axis.lty = 1,
col = barcols,
xlab = "Number x of waste items",
ylab = expression(paste("Probability ", P(X <= x))),
main = "Probability of number of waste items under the null hypothesis")
legend("topright",
expression(paste("Probability ", P(X <= 11))),
fill = "red")
pgreduced <- PlantGrowth[PlantGrowth$group != "trt1", ]
pgreduced$group <- factor(pgreduced$group)
boxplot(weight ~ group, data = pgreduced,
xlab = "Group", ylab = "Dried weight of plants in kg",
main = "Plant growth experiment")
ctrl <- pgreduced$group == "ctrl"
w_ctrl <- pgreduced$weight[ctrl]
w_trt2 <- pgreduced$weight[!ctrl]
xbar <- mean(w_trt2)
ybar <- mean(w_ctrl)
nx <- length(w_trt2)
ny <- length(w_ctrl)
se_x <- sd(w_trt2) / sqrt(nx)
se_y <- sd(w_ctrl) / sqrt(ny)
x <- seq(4, 6.5, length = 1001)
y_trt2 <- dnorm(x, mean = xbar, sd = se_x)
plot(w_trt2, runif(nx, 0, 0.1), col = "darkred", xlim = c(min(x), max(x)), ylim = c(0, 3),
xlab = "Dried weight of plants in kg", ylab = "Density",
main = "Plant growth experiment\nDistribution of sample means")
points(w_ctrl, runif(ny, 0, 0.1), col = "blue")
lines(x, y_trt2, col = "darkred")
abline(v = xbar, lty = "dashed", col = "darkred")
text(xbar, 0.15, substitute(bar(X) == xbar, list(xbar = paste0(xbar, "kg"))), pos = 4, col = "darkred")
y_ctrl <- dnorm(x, mean = ybar, sd = se_y)
lines(x, y_ctrl, col = "blue")
abline(v = ybar, lty = "dashed", col = "blue")
text(ybar, 0.15, substitute(bar(Y) == ybar, list(ybar = paste0(ybar, "kg"))), pos = 2, col = "blue")
legend("topright", c("X: treatment", "Y: control"),
lty = "solid", col = c("darkred", "blue"))
w_ttest_res <- t.test(w_trt2, w_ctrl, alternative = "greater")
se <- w_ttest_res$stderr
d <- w_trt2 - w_ctrl
d_tscale <- d / se
dbar <- mean(d)
dbar_tscale <- dbar / se
xmin <- floor(min(d_tscale)) - 1
xmax <- ceiling(max(d_tscale))
x <- seq(xmin, xmax, length.out = 1001)
y <- dt(x, w_ttest_res$parameter)
plot(x, y, type = "l", xaxt = "n",
xlab = expression(paste("Difference ", bar(X) - bar(Y), " in kg")),
ylab = "Density",
main = expression(paste("Distribution of the difference ", bar(X) - bar(Y),
" under ", H[0], ": ", mu[X] - mu[Y] <= 0)))
xaxt_labels <- seq(floor(xmin*se), ceiling(xmax*se), by = 0.5)
xaxt_labels_pos <- xaxt_labels/se
axis(1, at = xaxt_labels_pos, labels = xaxt_labels)
points(d_tscale, runif(length(d_tscale), 0, 0.01), col = "red")
abline(v = 0, lty = "dashed")
text(0, 0.025, expression(mu[X] - mu[Y] == 0), pos = 4)
abline(v = dbar_tscale, lty = "dashed", col = "red")
iv <- x >= dbar_tscale
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#ff000080", border = FALSE)
text(dbar_tscale + 0.25, 0.025, substitute(P(bar(X) - bar(Y) > dbar) == p,
list(dbar = paste0(dbar, "kg"),
p = round(w_ttest_res$p.value, 4))),
pos = 4, col = "red")
x <- seq(xmin, xmax, length.out = 1001)
y <- dt(x, w_ttest_res$parameter)
plot(x, y, type = "l",
xlab = "t",
ylab = "Density",
main = expression(paste(italic(t), " distribution under ", H[0], ": ", mu[X] - mu[Y] <= 0)))
points(d_tscale, runif(length(d), 0, 0.01), col = "red")
abline(v = 0, lty = "dashed")
text(0, 0.025, expression(mu[X] - mu[Y] == 0), pos = 4)
abline(v = dbar_tscale, lty = "dashed", col = "red")
iv <- x >= dbar_tscale
polygon(c(x[iv][1], x[iv], max(x[iv])),
c(0, y[iv], 0),
col = "#ff000080", border = FALSE)
text(dbar_tscale + 0.25, 0.025, substitute(1-P(t < tval) == pval,
list(dbar = paste0(dbar, "kg"),
tval = round(w_ttest_res$statistic, 4),
pval = round(w_ttest_res$p.value, 4))),
pos = 4, col = "red")
alpha <- 0.05
m <- 1:25
y <- 1-(1-alpha)^m
plot(m, y,
main = substitute(paste("Error rate for multiple tests with ", alpha==a), list(a=alpha)),
xlab = "Number of tests", ylab = "Probability of type I error")
lines(m, y)